In this first chunk, let’s read in the Census microdata. Here is some example code on how to read in the data, create new variables to categorize the rows of data into groups, and then summarize the data to create information about Louisville.
Our goal is to create variables for gender, age group, whether someone is a mother, whether someone is married, their level of education, their income, whether they are the head of household, and the number of children they have.
This code chunk will identify which households are homeowners vs. renters (in the homeownership variable) and which households are cost-burdened, meaning they pay more thatn 30% of their income toward rent or a mortgage (in the cost_burden variable).
There are also variables for severe cost burden (households that pay more than half of their income towards housing) and households with severe housing problems (lacking a kitchen, adequate plumbing, or an ample number of rooms for the number of people living there).
## SKIP ##
H_gen_trend<-survey_by_demog(census_microdata081122, weight_var = "HHWT", 'homeownership')
H_singFem_rank <- census_microdata081122 %>%
filter(
year == '2019',
earner_type == 'single_earner') %>%
survey_by_demog('homeownership', weight_var = "HHWT") %>%
filter(
sex == 'total',
race == 'total',
var_type == 'percent')
H_sinFem_kids <-census_microdata081122 %>%
filter(
year == '2019',
earner_type == 'single_earner',
NCHILD > 0) %>%
survey_by_demog('homeownership', weight_var = "HHWT") %>%
filter(
sex == 'total',
race == 'total',
var_type == 'percent')
H_s_Femkids_trend <- census_microdata081122 %>%
filter(earner_type == 'single_earner') %>%
survey_by_demog( weight_var = "HHWT", 'homeownership', other_grouping_vars = c("kd_pres"))
H_Fem_race <- census_microdata081122 %>%
filter(sex == 'female') %>%
survey_by_demog( weight_var = "HHWT", 'homeownership')
I_CB_earn_trend <- survey_by_demog(census_microdata081122, weight_var = "HHWT", 'cost_burden', other_grouping_vars = c('male_fem_mult_earn'))
save(H_gen_trend, H_singFem_rank, H_sinFem_kids, H_s_Femkids_trend, H_Fem_race, I_CB_earn_trend, lville_2019,
file = "clean_svybydemog_data.RData")
load("clean_svybydemog_data.RData")
#Waffle Chart
#H_gen_trend<-survey_by_demog(census_microdata081122, weight_var = "HHWT", 'homeownership')
H_gen_trend %<>%
filter(
var_type == 'percent',
race == 'total',
sex != 'total')
trend(H_gen_trend,
homeownership,
plot_title = "Homeownership by Year",
cat = 'sex',
y_title = 'Percent',
caption_text =
"Source Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA")
# H_singFem_rank <- census_microdata081122 %>%
# filter(
# year == '2019',
# earner_type == 'single_earner') %>%
# survey_by_demog('homeownership', weight_var = "HHWT") %>%
# filter(
# sex == 'total',
# race == 'total',
# var_type == 'percent')
ranking(H_singFem_rank, 'homeownership')
# H_sinFem_kids <-census_microdata081122 %>%
# filter(
# year == '2019',
# earner_type == 'single_earner',
# NCHILD > 0) %>%
# survey_by_demog('homeownership', weight_var = "HHWT") %>%
# filter(
# sex == 'total',
# race == 'total',
# var_type == 'percent')
ranking(H_sinFem_kids, 'homeownership')
# H_s_Femkids_trend <- census_microdata081122 %>%
# filter(earner_type == 'single_earner') %>%
# survey_by_demog( weight_var = "HHWT", 'homeownership', other_grouping_vars = c("kd_pres"))
H_s_Femkids_trend %<>%
filter(
var_type == 'percent',
race == 'total',
sex == "female") %>%
pivot_wider(names_from = 'kd_pres', values_from = 'homeownership') %>%
select(-sex)
trend(H_s_Femkids_trend,
kids:no_kids,
rollmean = 3,
plot_title = "Female Homeownership by Presence of Children",
cat = c("Children" = "kids", "No Children" = "no_kids"),
y_title = 'Percent',
caption_text =
"Source Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA")
#H_Fem_race <- survey_by_demog(census_microdata081122 ,weight_var = "HHWT", 'homeownership')
# H_Fem_race <- census_microdata081122 %>%
# filter(sex == 'female') %>%
# survey_by_demog( weight_var = "HHWT", 'homeownership')
H_Fem_race %<>%
filter(
var_type == 'percent',
race != 'total',
sex == 'total')
trend(H_Fem_race,
homeownership,
rollmean = 3,
pctiles = F,
plot_title = "Female With Kids Homeownership by Year",
cat = 'race',
y_title = 'Percent',
caption_text =
"Source Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA")
#################KEEP################
## create hist for each of the most three recent years -> just for verification...that there isnt anything weird
## if feeling funky, use gganimate to create a gif of histograms from 2000 to 2019
#TO DO:
# Change the x-axis to a dollar format (see trendline function in trendline_helpers.R) - done
# Label the x-axis on every $50,000? - done
# Let's use counts on the y-axis for the male and female graph - done?
# Format the y-axis with commas - done
# remove legend? - done
#p$HHINCOME %>% dollar(accuracy = 0.1, scale = .001, suffix = "k")
p <- lville_2019 %>%
filter(HHINCOME <= cut_95,
earner_type == "single_earner") %>%
func_plt_hist_overlay( "sex")
p <- p + glp_graph_theme
p <- p + labs(
title = "Male Single Earner vs Female Single Earner Dollars",
) +
ylab(" ") +
guides(color = FALSE) +
facet_wrap(~sex, nrow = 2)
p <- p +
theme(
#axis.ticks.x = element_line(size = 50000),
strip.text = element_blank()
) +
scale_x_continuous(
breaks = c(50000, 100000, 150000, 200000),
label = c("$50k", "$100k", "$150k", "$200k")
) +
scale_y_continuous(labels = scales::comma)
p
# TO DO
# facet this into four panels
# Either check that the y-axis become independent to get more info around other groups OR change the data from counts to percentages
sing_fem_inc_race <- lville_2019 %>%
filter(
sex == 'female',
earner_type == 'single_earner',
HHINCOME <= cut_95) %>%
func_plt_hist_overlay( "race")
sing_fem_inc_race <- sing_fem_inc_race + facet_wrap(~race, nrow = 2)
sing_fem_inc_race <- sing_fem_inc_race + glp_graph_theme
sing_fem_inc_race <- sing_fem_inc_race +
labs(
title = "Single Female Earner Income",
) +
ylab(" ") +
guides(color = FALSE)
sing_fem_inc_race <- sing_fem_inc_race +
theme(
#axis.ticks.x = element_line(size = 50000),
strip.text = element_blank()
) +
scale_x_continuous(
breaks = c(50000, 150000),
label = c("$50k", "$150k")
)
sing_fem_inc_race
# TO DO
# see above for x and y axis to-dos
# Drop the legend title (see )
cost_burden_sf <- lville_2019 %>%
filter(
sex == 'female',
earner_type == 'single_earner',
HHINCOME <= cut_95) %>%
mutate(
cost_burden = factor(cost_burden,
levels = rev(c(TRUE, FALSE)),
labels = rev(c("Cost Burdened", "Non Cost Burdened")),
ordered = TRUE))
cost_burden_sf_plot <- ggplot(cost_burden_sf,
aes(x = HHINCOME, fill = cost_burden, weights = HHWT),
color="#00A9B7",
alpha=0.5,
position = "stack",
binwidth = 10000) +
geom_histogram()
cost_burden_sf_plot <- cost_burden_sf_plot + glp_graph_theme
cost_burden_sf_plot <- cost_burden_sf_plot +
labs(
title = "Single Female Earner Cost Burden",
) +
ylab(" ") +
xlab("Household Income") +
guides(color = FALSE)
cost_burden_sf_plot <- cost_burden_sf_plot +
theme(
#axis.ticks.x = element_line(size = 50000),
strip.text = element_blank()
) +
scale_x_continuous(
breaks = c(50000, 100000, 150000, 200000),
label = c("$50k", "$100k", "$150k", "$200k")
) +
scale_y_continuous(labels = scales::comma)
cost_burden_sf_plot
#I_CB_earn_trend <- survey_by_demog(census_microdata081122, weight_var = "HHWT", 'cost_burden', other_grouping_vars = c('earner_type'), breakdowns = "sex")
# I_CB_earn_trend <- survey_by_demog(census_microdata081122, weight_var = "HHWT", 'cost_burden', other_grouping_vars = c('male_fem_mult_earn'))
I_CB_earn_trend %<>%
filter(
var_type == 'percent',
race == 'total',
sex == 'total') %>%
select( -c(sex,race)) %>%
pivot_wider(names_from = "male_fem_mult_earn", values_from = "cost_burden")
trend(I_CB_earn_trend,
multiple_earner:single_fem_earner:single_male_earner,
plot_title = "Cost Burden by Earner Type",
cat = c("Multiple Earners" = "multiple_earner", "Single Female Earner" = "single_fem_earner", "Single Male Earner" = "single_male_earner"),
y_title = 'Percent',
caption_text =
"Source Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA")
I_median_earn_age <- lville_2019 %>%
group_by(age_group, male_fem_mult_earn) %>%
summarize(Med=median(HHINCOME))
ggplot(I_median_earn_age,
aes(x=age_group, y=Med, fill = male_fem_mult_earn)) +
geom_bar(stat="identity", position='dodge')
#Do not use
ggplot(I_median_earn_age,
aes(x=age_group, y=Med, fill = male_fem_mult_earn)) +
geom_bar(stat="identity", position='fill')
E_singM_singF <- lville_2019 %>%
filter(earner_type == 'single_earner') %>%
group_by(sex, educ) %>%
summarize(n=sum(HHWT, na.rm = TRUE)) %>%
mutate(
total = sum(n),
rate = n/sum(n)*100,
educ = factor(educ,
levels = rev(c("no_hs", "hs", "some_col", "assoc", "bach","grad")),
ordered = TRUE))
ggplot(E_singM_singF,
aes(x=sex, y=rate, fill=educ)) +
geom_bar(stat="identity", position='fill')
E_singF_race <- lville_2019 %>%
filter(
sex == 'female',
earner_type == 'single_earner') %>%
group_by(race, educ) %>%
summarize(n=sum(HHWT, na.rm = TRUE)) %>%
mutate(
total = sum(n),
rate = n/sum(n)*100,
educ = factor(educ,
levels = rev(c("no_hs", "hs", "some_col", "assoc", "bach","grad")),
ordered = TRUE))
ggplot(E_singF_race, aes(x=race, y=rate, fill=educ)) +
geom_bar(stat="identity", position='fill')
#Stacked histogram
cost_burden_age_sf <- census_microdata081122 %>%
filter(year %in% 2010:2019) %>%
mutate(
cost_burden = factor(cost_burden,
levels = rev(c(TRUE, FALSE)),
labels = rev(c("Cost Burdened", "Non Cost Burdened")),
ordered = TRUE)
)
ggplot(cost_burden_age_sf,
aes(x=age_group, y=HHWT , fill=cost_burden),
color="#00A9B7") +
geom_bar(stat="identity", position='fill')
ggplot(cost_burden_age_sf,
aes(x=age_group, y=HHWT , fill=cost_burden),
color="#00A9B7") +
geom_bar(stat="identity", position='fill')+
facet_wrap(~male_fem_mult_earn)
earner_trend <- census_microdata081122 %>%
mutate(
male_fem_mult_earn = case_when(
sex == 'female' & earner_type == 'single_earner' ~ 'single_fem_earner',
sex == 'male' & earner_type == 'single_earner' ~ 'single_male_earner',
earner_type == 'multi_earner' ~ 'multiple_earner')
) %>%
group_by(year, male_fem_mult_earn) %>%
summarize(n=sum(HHWT, na.rm = TRUE)) %>%
mutate(
total = sum(n),
rate = n/sum(n)*100)
ggplot(earner_trend,
aes(x=year, y=rate, fill=male_fem_mult_earn),
color="#00A9B7") +
geom_bar(stat="identity", position='fill')